#Calculations for pressure (mean, median, mode IQR)
P_mean <- round(apply(predP, 1, mean), digits = 1); OUTPUTDATA$P_mean <- P_mean
P_median <- round(apply(predP, 1, median), digits = 1); OUTPUTDATA$P_median <- P_median
P_tab <- apply(round(predP, digits = 1), 1, table)
P_mode <- unlist(lapply(P_tab, function(x) {as.numeric(names(sort(x, decreasing = T)[1]))})); OUTPUTDATA$P_mode <- P_mode
P_IQR <- round(apply(predP, 1, IQR), digits = 1); OUTPUTDATA$P_IQR <- P_IQR
#Calculations for temperature (mean, median, mode IQR)
T_mean <- round(apply(predT, 1, mean), digits = 0); OUTPUTDATA$T_mean <- T_mean
T_median <- round(apply(predT, 1, median), digits = 0); OUTPUTDATA$T_median <- T_median
T_tab <- apply(round(predT, digits = 0), 1, table)
T_mode <- unlist(lapply(T_tab, function(x) {as.numeric(names(sort(x, decreasing = T)[1]))})); OUTPUTDATA$T_mode <- T_mode
T_IQR <- round(apply(predT, 1, IQR), digits = 0); OUTPUTDATA$T_IQR <- T_IQR
##
Output_alk_fil = OUTPUTDATA
#Create final output dataframe
write.csv(Output_alk_fil,'Output_alk_fil.csv')
save(Output_alk_fil, file = "Output_alk_fil.Rdata")
par(pty="s")
par(mfrow = c(1,1))
plot(Output_malktest$P_median,Output_malktest$P,pch=19, col=cl,xlim=c(0,30),
xlab="Predicted P (kbar)", ylab="True P (kbar)")
points(Output_maf_fil$P_median,Output_maf_fil$P,pch=18, col=cl)
points(Output_alk_fil$P_median,Output_alk_fil$P,pch=17, col=cl)
plot(Output_malktest$P_median,Output_malktest$P,pch=19, col=red,xlim=c(0,30),
xlab="Predicted P (kbar)", ylab="True P (kbar)")
points(Output_maf_fil$P_median,Output_maf_fil$P,pch=18, col=blue)
points(Output_alk_fil$P_median,Output_alk_fil$P,pch=17, col=green)
abline(1,1)
plot(Output_malktest$P_median,Output_malktest$P,pch=19, col="red",xlim=c(0,30),
xlab="Predicted P (kbar)", ylab="True P (kbar)")
points(Output_maf_fil$P_median,Output_maf_fil$P,pch=18, col="blue")
points(Output_alk_fil$P_median,Output_alk_fil$P,pch=17, col="green")
abline(1,1)
###### Filtering #####
######################
#Here we filter on the basis of Kd and cations
#we also filter for reasonable SiO2 and P ranges with enough datapoints (currently max 50 kbar)
#this spits out a file called input - this is what the calibration dataset will be
######################
setwd(paste(dirname(rstudioapi::getActiveDocumentContext()$path)))
#Load all data
load("raw.Rdata")
dat <- raw
#Make a final input.cats dataframe on which to perform the filtering
cats <- dat[,paste0(c("Si", "Al", "Ti", "Ca", "Na", "K", "Fe", "Mg", "Mn", "Cr", "Ni", "P"), ".cpx")]
#Make a column to decide if the experiment should be removed or not
dat$Rm <- "N"
#Remove obviously bad experiments. These should never be added to the model because they are poor
dat$Rm[which(is.na(apply(cats, 1, sum)))] <- "Y"
dat$Rm[which(apply(cats, 1, sum) > 4.04 | apply(cats, 1, sum) < 3.96)] <- "Y"
#Add a kd filter
upper.kd <- 0.68
lower.kd <- 0.04
dat$kd <- (dat$FeO.cpx/dat$MgO.cpx) / (dat$FeO.liq/dat$MgO.liq)
dat$Rm[which(is.na(dat$kd) == TRUE)] <- "Y"
dat$Rm[which(dat$kd > upper.kd | dat$kd < lower.kd)] <- "Y"
#Remove experiments that we decide are too deep for either crustal or mantle models
max.p <- 50
dat$Rm[which(dat$P > max.p)] <- "Y"
#Remove extremly low SiO2 liquids
dat$Rm[which(dat$SiO2.liq < 35)] <- "Y"
#maf_fil - remove high SiO2
dat$Rm[which(dat$SiO2.liq > 50)] <- "Y"
#_alk_fil - remove low alk
dat$nak = dat$Na2O.liq+dat$K2O.liq
dat$Rm[which(dat$nak < 5)] <- "Y"
#Remove experiments and set final file for input
input <- dat[which(dat$Rm == "N"),]
#Mix data (to avoid the bias linked to the organisation of the data with the most recent at the end of the matrix)
input<- input[sample(seq(1,nrow(input),1), nrow(input)),]
rownames(input) <- NULL
save(input, file = "input.Rdata")
########Grid Search#########
############################
#This script extracts the test dataset using a grid system to maintain
#uniform sampling across the PT space.
#We extract 200 test/train datasets - this is becasue the largest source of variation in the SEE
#is the extraction of the test/train dataset. So we repeated the model 200 times to get an
#average and robust SEE.
############################
setwd(paste(dirname(rstudioapi::getActiveDocumentContext()$path)))
options(scipen = 999)
'%!in%' <- function(x,y)!('%in%'(x,y))
mround <- function(x,base){
base*round(x/base)
}
#Load raw data
load("input.Rdata")
###### SELECT TEST DATASET#####
#Select the sample size for the number of test points
r <- 200                #This controls how many test/train datasets you can have is
n <- rep(1, times = r)
test.ids <- list()      #Make a list to save the testids
#Loop over the number of test points to get the ids for the rows
for(i in 1:length(n)) {
#Set number of test and train experiments
n.test<- floor(nrow(input)/n[i])
#Make a grid of P and T from which to sample the experiments and produce the test dataset
T.upper <- round(seq(from = min(input$T)-1, to = max(input$T)+1, length.out = ceiling(n.test^0.5)), digits = 0)[2:ceiling(n.test^0.5)]     #Upper T bounds for the grid
T.lower <- round(seq(from = min(input$T)-1, to = max(input$T)+1, length.out = ceiling(n.test^0.5)), digits = 0)[1:(ceiling(n.test^0.5)-1)] #Lower T bounds for the grid
P.upper <- round(seq(from = min(input$P)-0.1, to = max(input$P)+0.1, length.out = ceiling(n.test^0.5)), digits = 1)[2:ceiling(n.test^0.5)]     #Upper P bound
P.lower <- round(seq(from = min(input$P)-0.1, to = max(input$P)+0.1, length.out = ceiling(n.test^0.5)), digits = 1)[1:(ceiling(n.test^0.5)-1)] #Lower P bound
perms <- unique(expand.grid(data.frame(P.lower, T.lower)))  # This gives all the possible grid point combos for the lower points
perms$P.upper <- P.upper[match(perms$P.lower, P.lower)]     # Match upper bounds to the lower and add
perms$T.upper <- T.upper[match(perms$T.lower, T.lower)]     # Repeat for T
#Sample 1 experiment from each of the PT brackets
sam <- lapply(1:nrow(perms), function(x) which(input$P < perms$P.upper[x] & input$P >= perms$P.lower[x] & input$T < perms$T.upper[x] & input$T >= perms$T.lower[x])) #Creates each grid, which are defined by df perms
sam[sapply(sam, function(sam) length(sam)==0)] <- NA        # For sam if the length is 0 set as NA
samp <- lapply(1:length(sam), function(x) sample(sam[[x]], size = 1)) # Samples points from each gridspace
perms$samp <- unlist(samp) #Add sample to perms
#Find the number of experiments in each bracket
perms$n <- unlist(lapply(1:length(sam), function(x) length(which(is.na(sam[[x]]) == F))))
#Remove brackets from perm which don't have any experiments (or only have a single experiment)
no.perms <- which(is.na(perms$samp) | perms$n < 2)
perms <- perms[-no.perms,]
#N.test is the number of unique values of the perms id
test.ids[[i]] <- perms$samp
#End loop
}
##### FIND THE ID OF THE TRAIN IDS ####
train.ids <- lapply(1:length(test.ids), function(x) {sample(which(1:nrow(input) %!in% test.ids[[x]]), size = (nrow(input) - length(test.ids[[x]])), replace = F)})
#### SAVE TEST AND TRAIN IDS ####
save(test.ids, file = "testids.Rdata")
save(train.ids, file = "trainids.Rdata")
######################
#### DETERMINE SEE####
######################
#we recommend cleaning the global environment before running this code, sometimes if you don't you will get an error with the "times" argument at the end
#### INSTALL REQUIRED PACKAGES IF NECESSARY
pack1 <- suppressWarnings(require(PerformanceAnalytics))
if(pack1 == FALSE) {install.packages("PerformanceAnalytics")}
pack2 <- suppressWarnings(require(rJava))
if(pack2 == FALSE) {install.packages("rJava")}
pack3 <- suppressWarnings(require(extraTrees))
if(pack3 == FALSE) {install.packages("extraTrees")}
pack4 <- suppressWarnings(require(readxl))
if(pack4 == FALSE) {install.packages("readxl")}
pack5 <- suppressWarnings(require(EnvStats))
if(pack5 == FALSE) {install.packages("EnvStats")}
options(java.parameters = "-Xmx4g") #Note: you must run this at the beginning of you session otherwise there will be problems
setwd(paste(dirname(rstudioapi::getActiveDocumentContext()$path)))
options(scipen = 999)
'%!in%' <- function(x,y)!('%in%'(x,y))
######### READ DATA AND REMOVE FILTERED EXPERIMENTS #######
#Load raw data
load("input.Rdata")
##### LOAD IN TEST AND TRAIN IDS
load("testids.Rdata")
load("trainids.Rdata")
#### What do you have? ####
#YOU DECIDE#
liq <- c("NoLiquid") #OR NoLiquid
#Set the oxides
ox <- c("SiO2", "TiO2","Al2O3", "Cr2O3","FeO","MgO", "MnO", "CaO", "Na2O")
liqox <- c("SiO2", "TiO2","Al2O3", "FeO","MgO", "MnO", "CaO", "Na2O","K2O")
var <- paste0(ox,".cpx") #adds .cpx to the cpx oxides
if(liq == "Liquid") {var <- c(var, paste0(liqox,".liq"))} #adds .liq to the liquid variables
#Select hyperparamters
r <- 200                #Number of test/train splits
n.cuts <- 1             #number of random cuts
n.tree <- 201           #number of trees
m.try <-length(var)*2/3 #this should be 12 for liquid and 6 for no liquid.
#PRESSURE
#Make empty lists to populate
Pred_P_mean <- list(); Pred_P_median <- list(); Pred_P_mode <- list()
Pred_P_all <- list(); Resid_P_mean <- list(); Resid_P_median <- list(); Resid_P_mode <- list()
SEE_P_mean <- list(); SEE_P_median <- list(); SEE_P_mode <- list()
R2_P_mean <- list(); R2_P_median <- list(); R2_P_mode <- list(); model_P <- list()
P_IQR <- list()
for(j in 1:r) {
#Train the pressure model
model_P <- extraTrees(x = input[train.ids[[j]], var], y = input$P[train.ids[[j]]], ntree = n.tree, mtry = m.try, numRandomCuts = n.cuts, numThreads = 8) ; print(j)
#Add pause
# Sys.sleep(time = 0.1)
#Predict the pressure in the testset by taking the mean value
Pred_P_mean[[j]] <- round(apply(predict(model_P, newdata = input[test.ids[[j]], var], allValues = T), 1, mean), digits = 1)
#Predict the pressure in the testset by taking the median value
Pred_P_median[[j]] <- round(apply(predict(model_P, newdata = input[test.ids[[j]], var], allValues = T), 1, median), digits = 1)
#Predict the pressure in the testset by taking the modal value
Pred_P_all[[j]] <- round(predict(model_P, newdata = input[test.ids[[j]], var], allValues = T), digits = 1)
Pred_P_tab <- apply(round(predict(model_P, newdata = input[test.ids[[j]], var], allValues = T), digits = 1), 1, table)
Pred_P_mode[[j]] <- unlist(lapply(Pred_P_tab, function(x) {as.numeric(names(sort(x, decreasing = T)[1]))}))
#Calculate the IQR of the voting distribution
P_IQR[[j]] <- round(apply(predict(model_P, newdata = input[test.ids[[j]], var], allValues = T), 1, IQR), digits = 1)
#Calculate the pressure residual from each of the models
Resid_P_mean[[j]] <- input$P[test.ids[[j]]]-unlist(Pred_P_mean[[j]])
Resid_P_median[[j]] <- input$P[test.ids[[j]]]-unlist(Pred_P_median[[j]])
Resid_P_mode[[j]] <- input$P[test.ids[[j]]]-unlist(Pred_P_mode[[j]])
#Calculate the pressure SEE for each study
SEE_P_mean[[j]] <- round((sum((Pred_P_mean[[j]]-input$P[test.ids[[j]]])^2)/length(input$P[test.ids[[j]]]))^(0.5), digits = 2)
SEE_P_median[[j]] <- round((sum((Pred_P_median[[j]]-input$P[test.ids[[j]]])^2)/length(input$P[test.ids[[j]]]))^(0.5), digits = 2)
SEE_P_mode[[j]] <- round((sum((Pred_P_mode[[j]]-input$P[test.ids[[j]]])^2)/length(input$P[test.ids[[j]]]))^(0.5), digits = 2)
#Calculte the pressure R for each study
R2_P_mean[[j]] <- round(summary(lm(Pred_P_mean[[j]]~input$P[test.ids[[j]]]))$r.squared, digits = 3)
R2_P_median[[j]] <- round(summary(lm(Pred_P_median[[j]]~input$P[test.ids[[j]]]))$r.squared, digits = 3)
R2_P_mode[[j]] <- round(summary(lm(Pred_P_mode[[j]]~input$P[test.ids[[j]]]))$r.squared, digits = 3)
#Add pause
# Sys.sleep(time = 0.1)
rm(model_P)
}
#TEMPERATURE
#Make empty lists to populate
Pred_T_mean <- list(); Pred_T_median <- list(); Pred_T_mode <- list()
Pred_T_all <- list(); Resid_T_mean <- list(); Resid_T_median <- list(); Resid_T_mode <- list()
SEE_T_mean <- list(); SEE_T_median <- list(); SEE_T_mode <- list()
R2_T_mean <- list(); R2_T_median <- list(); R2_T_mode <- list(); model_T <- list()
T_IQR <- list()
for(j in 1:r) {
#Train the pressure model
model_T <- extraTrees(x = input[train.ids[[j]], var], y = input$T[train.ids[[j]]], ntree = n.tree, mtry = m.try, numRandomCuts = n.cuts, numThreads = 8) ; print(j)
#Add pause
# Sys.sleep(time = 0.1)
#Predict the pressure in the testset by taking the mean value
Pred_T_mean[[j]] <- round(apply(predict(model_T, newdata = input[test.ids[[j]], var], allValues = T), 1, mean), digits = 0)
#Predict the pressure in the testset by taking the median value
Pred_T_median[[j]] <- round(apply(predict(model_T, newdata = input[test.ids[[j]], var], allValues = T), 1, median), digits = 0)
#Predict the pressure in the testset by taking the modal value
Pred_T_all[[j]] <- round(predict(model_T, newdata = input[test.ids[[j]], var], allValues = T), digits = 0)
Pred_T_tab <- apply(round(predict(model_T, newdata = input[test.ids[[j]], var], allValues = T), digits = 0), 1, table)
Pred_T_mode[[j]] <- unlist(lapply(Pred_T_tab, function(x) {as.numeric(names(sort(x, decreasing = T)[1]))}))
#Calculate the IQR of the voting distribution
T_IQR[[j]] <- round(apply(predict(model_T, newdata = input[test.ids[[j]], var], allValues = T), 1, IQR), digits = 0)
#Calculate the pressure residual from each of the models
Resid_T_mean[[j]] <- input$T[test.ids[[j]]]-unlist(Pred_T_mean[[j]])
Resid_T_median[[j]] <- input$T[test.ids[[j]]]-unlist(Pred_T_median[[j]])
Resid_T_mode[[j]] <- input$T[test.ids[[j]]]-unlist(Pred_T_mode[[j]])
#Calculate the pressure SEE for each study
SEE_T_mean[[j]] <- round((sum((Pred_T_mean[[j]]-input$T[test.ids[[j]]])^2)/length(input$T[test.ids[[j]]]))^(0.5), digits = 0)
SEE_T_median[[j]] <- round((sum((Pred_T_median[[j]]-input$T[test.ids[[j]]])^2)/length(input$T[test.ids[[j]]]))^(0.5), digits = 0)
SEE_T_mode[[j]] <- round((sum((Pred_T_mode[[j]]-input$T[test.ids[[j]]])^2)/length(input$T[test.ids[[j]]]))^(0.5), digits = 0)
#Calculte the pressure R for each study
R2_T_mean[[j]] <- round(summary(lm(Pred_T_mean[[j]]~input$T[test.ids[[j]]]))$r.squared, digits = 3)
R2_T_median[[j]] <- round(summary(lm(Pred_T_median[[j]]~input$T[test.ids[[j]]]))$r.squared, digits = 3)
R2_T_mode[[j]] <- round(summary(lm(Pred_T_mode[[j]]~input$T[test.ids[[j]]]))$r.squared, digits = 3)
#Add pause
# Sys.sleep(time = 0.1)
rm(model_T)
}
#### SAVE CATION DATA #####
output <- input[unlist(test.ids),]
output$r <- rep(1:r, times = lapply(test.ids, length))
#Add pressure data
output$Pred.P.mean <- unlist(Pred_P_mean)
output$Pred.P.med <- unlist(Pred_P_median)
output$Pred.P.mode <- unlist(Pred_P_mode)
output$Resid.P.mean <- unlist(Resid_P_mean)
output$Resid.P.med <- unlist(Resid_P_median)
output$Resid.P.mode <- unlist(Resid_P_mode)
output$SEE.P.mean <- rep(unlist(SEE_P_mean), times = lapply(test.ids, length))
output$SEE.P.med <- rep(unlist(SEE_P_median), times = lapply(test.ids, length))
output$SEE.P.mode <- rep(unlist(SEE_P_mode), times = lapply(test.ids, length))
output$R2.P.mean <- rep(unlist(R2_P_mean), times = lapply(test.ids, length))
output$R2.P.med <- rep(unlist(R2_P_median), times = lapply(test.ids, length))
output$R2.P.mode <- rep(unlist(R2_P_mode), times = lapply(test.ids, length))
#Add temperature data
output$Pred.T.mean <- unlist(Pred_T_mean)
output$Pred.T.med <- unlist(Pred_T_median)
output$Pred.T.mode <- unlist(Pred_T_mode)
output$Resid.T.mean <- unlist(Resid_T_mean)
output$Resid.T.med <- unlist(Resid_T_median)
output$Resid.T.mode <- unlist(Resid_T_mode)
output$SEE.T.mean <- rep(unlist(SEE_T_mean), times = lapply(test.ids, length))
output$SEE.T.med <- rep(unlist(SEE_T_median), times = lapply(test.ids, length))
output$SEE.T.mode <- rep(unlist(SEE_T_mode), times = lapply(test.ids, length))
output$R2.T.mean <- rep(unlist(R2_T_mean), times = lapply(test.ids, length))
output$R2.T.med <- rep(unlist(R2_T_median), times = lapply(test.ids, length))
output$R2.T.mode <- rep(unlist(R2_T_mode), times = lapply(test.ids, length))
final_malk_fil <- output
save(final_malk_fil, file = "final_malk_fil.Rdata")
#### Calculate the SEE ####
mean(unlist(SEE_P_mean))
mean(unlist(SEE_T_mean))
###############################
#### MAKE THE ACTUAL MODEL ####
###############################
#### INSTALL REQUIRED PACKAGES IF NECESSARY
pack1 <- suppressWarnings(require(PerformanceAnalytics))
if(pack1 == FALSE) {install.packages("PerformanceAnalytics")}
pack2 <- suppressWarnings(require(rJava))
if(pack2 == FALSE) {install.packages("rJava")}
pack3 <- suppressWarnings(require(extraTrees))
if(pack3 == FALSE) {install.packages("extraTrees")}
pack4 <- suppressWarnings(require(EnvStats))
if(pack4 == FALSE) {install.packages("EnvStats")}
options(java.parameters = "-Xmx4g") #Note: you must run this at the beginning of you session otherwise it'll suck
setwd(paste(dirname(rstudioapi::getActiveDocumentContext()$path)))
options(scipen = 999)
'%!in%' <- function(x,y)!('%in%'(x,y))
######### READ DATA AND REMOVE FILTERED EXPERIMENTS #######
#Load raw data
load("input.Rdata")
#### What do you have? ####
#SAME AS #4 or else the SEE you calculated is incorrect!
liq <- c("NoLiquid") #OR Liquid
#Set the oxides
ox <- c("SiO2", "TiO2","Al2O3", "Cr2O3","FeO","MgO", "MnO", "CaO", "Na2O") #Make sure this order stays the same!!
liqox <- c("SiO2", "TiO2","Al2O3", "FeO","MgO", "MnO", "CaO", "Na2O","K2O")
var <- paste0(ox,".cpx") #adds .cpx to the cpx oxides
if(liq == "Liquid") {var <- c(var, paste0(liqox,".liq"))} #adds .liq to the liquid variables
#Hyperparamters
r <- 200                #Number of test/train splits
n.cuts <- 1             #number of random cuts
n.tree <- 201           #number of trees
m.try <-length(var)*2/3 #this should be 12 for liquid and 6 for no liquid.
P_C_malk_fil <- extraTrees(x = input[, var], y = input$P, ntree = n.tree, mtry = m.try, numRandomCuts = n.cuts, numThreads = 8)
#Train temperature model
T_C_malk_fil <- extraTrees(x = input[, var], y = input$T, ntree = n.tree, mtry = m.try, numRandomCuts = n.cuts, numThreads = 8)
#Save models
prepareForSave(P_C_malk_fil)
save(P_C_malk_fil, file = "P_C_malk_fil.Rdata")
prepareForSave(T_C_malk_fil)
save(T_C_malk_fil, file = "T_C_malk_fil.Rdata")
########################
#####Run the model######
########################
#Load your data in! The SEE for this is what you calculated in #4
#######################
#### INSTALL REQUIRED PACKAGES IF NECESSARY
pack1 <- suppressWarnings(require(PerformanceAnalytics))
if(pack1 == FALSE) {install.packages("PerformanceAnalytics")}
pack2 <- suppressWarnings(require(rJava))
if(pack2 == FALSE) {install.packages("rJava")}
pack3 <- suppressWarnings(require(extraTrees))
if(pack3 == FALSE) {install.packages("extraTrees")}
pack4 <- suppressWarnings(require(readxl))
if(pack4 == FALSE) {install.packages("readxl")}
pack5 <- suppressWarnings(require(EnvStats))
if(pack5 == FALSE) {install.packages("EnvStats")}
options(java.parameters = "-Xmx4g") #Note: you must run this at the beginning of you session otherwise it'll suck
setwd(paste(dirname(rstudioapi::getActiveDocumentContext()$path)))
####load models####
load("P_C_malk_fil.Rdata")
load("T_C_malk_fil.Rdata")
#### What do you have? ####
#SAME AS #4 or else the SEE you calculated is incorrect!
liq <- c("NoLiquid") #OR Liquid
#Load input data
load(file = "YOURDATA.Rdata")
YOURDATA <- input.user
####Load in your data####
#Filter your data using sheet 1 and 2 before this step.
INPUTDATA <- YOURDATA[,c("SiO2.cpx","TiO2.cpx","Al2O3.cpx","Cr2O3.cpx","FeO.cpx","MgO.cpx","MnO.cpx","CaO.cpx","Na2O.cpx")] #make sure the elements are the same and they have the same naming convention
if(liq== "Liquid"){INPUTDATA <- YOURDATA[,c("SiO2.cpx","TiO2.cpx","Al2O3.cpx","Cr2O3.cpx","FeO.cpx","MgO.cpx","MnO.cpx","CaO.cpx","Na2O.cpx","SiO2.liq", "TiO2.liq","Al2O3.liq", "FeO.liq","MgO.liq", "MnO.liq", "CaO.liq", "Na2O.liq","K2O.liq")]}
####Run the models ####
predP <- predict(P_C_malk_fil, newdata = INPUTDATA, allValues=TRUE) #this applies the model to your data
predT <- predict(T_C_malk_fil, newdata = INPUTDATA, allValues=TRUE) #this applies the model to your data
#Create dataframe to save results
OUTPUTDATA <- YOURDATA
#Calculations for pressure (mean, median, mode IQR)
P_mean <- round(apply(predP, 1, mean), digits = 1); OUTPUTDATA$P_mean <- P_mean
P_median <- round(apply(predP, 1, median), digits = 1); OUTPUTDATA$P_median <- P_median
P_tab <- apply(round(predP, digits = 1), 1, table)
P_mode <- unlist(lapply(P_tab, function(x) {as.numeric(names(sort(x, decreasing = T)[1]))})); OUTPUTDATA$P_mode <- P_mode
P_IQR <- round(apply(predP, 1, IQR), digits = 1); OUTPUTDATA$P_IQR <- P_IQR
#Calculations for temperature (mean, median, mode IQR)
T_mean <- round(apply(predT, 1, mean), digits = 0); OUTPUTDATA$T_mean <- T_mean
T_median <- round(apply(predT, 1, median), digits = 0); OUTPUTDATA$T_median <- T_median
T_tab <- apply(round(predT, digits = 0), 1, table)
T_mode <- unlist(lapply(T_tab, function(x) {as.numeric(names(sort(x, decreasing = T)[1]))})); OUTPUTDATA$T_mode <- T_mode
T_IQR <- round(apply(predT, 1, IQR), digits = 0); OUTPUTDATA$T_IQR <- T_IQR
##
Output_malk_fil = OUTPUTDATA
#Create final output dataframe
write.csv(Output_malk_fil,'Output_malk_fil.csv')
save(Output_malk_fil, file = "Output_malk_fil.Rdata")
plot(Output_malktest$P_median,Output_malktest$P,pch=19, col="red",xlim=c(0,30),
xlab="Predicted P (kbar)", ylab="True P (kbar)")
points(Output_maf_fil$P_median,Output_maf_fil$P,pch=18, col="blue")
points(Output_alk_fil$P_median,Output_alk_fil$P,pch=17, col="green")
points(Output_malk_fil$P_median,Output_malk_fil$P,pch=17, col="orange")
abline(1,1)
View(Output_malktest)
par(pty="s")
par(mfrow = c(1,1))
plot(Output_malktest$P_median,Output_malktest$P,pch=19, col=cl,xlim=c(0,30),
xlab="Predicted P (kbar)", ylab="True P (kbar)")
points(Output_maf_fil$P_median,Output_maf_fil$P,pch=18, col=cl)
points(Output_alk_fil$P_median,Output_alk_fil$P,pch=17, col=cl)
points(Output_alk_fil$P_median,Output_alk_fil$P,pch=16, col=cl)
points(Output_malk_fil$P_median,Output_malk_fil$P,pch=15, col=cl)
abline(1,1)
r2_malktest = summary(lm(Output_malktest$P_median~Output_malktest$P))$r.squared
r2_maf_fill = summary(lm(Output_maf_fil$P_median~Output_maf_fil$P))$r.squared
r2_malktest = summary(lm(P_median~P, data = Output_malktest))$r.squared
r2_malktest = summary(lm(P_median~P, data = Output_malktest))$r.squared
r2_malktest = summary(lm(P~P_median, data = Output_malktest))$r.squared
r2_maf_fil = summary(lm(P_median~P, data = Output_maf_fil))$r.squared
r2_malktest = summary(lm(P_median~P, data = Output_malktest))$r.squared
r2_maf_fil  = summary(lm(P_median~P, data = Output_maf_fil))$r.squared
r2_alk_fil  = summary(lm(P_median~P, data = Output_alk_fil))$r.squared
r2_malk_fil = summary(lm(P_median~P, data = Output_malk_fil))$r.squared
rm(r2_malk_fill)
rm(r2_maf_fill)
P_malktest = Output_malktest[which(Output_malktest$P_IQR =< 5),]
P_malktest = Output_malktest[which(Output_malktest$P_IQR <= 5),]
r2_IQRfil_malktest = summary(lm(P_median~P, data = P_malktest))$r.squared
plot(P_malktest$P_median,P_malktest$P)
plot(Output_malktest$P_median,Output_malktest$P,pch=19, col=cl,xlim=c(0,30),
xlab="Predicted P (kbar)", ylab="True P (kbar)")
points(P_malktest$P_median,P_malktest$P)
abline(1,1)
summary(lm(P~P_median, data = P_malktest))$r.squared
summary(lm(P_median~P, data = Output_malktest))$r.squared
########################
#####PLUG AND PLAY######
########################
# written by Corin Jorgenson and Oliver Higgins 2021 #
#################################
####### USER INPUT REQUIRED #####
#################################
#### CHOOSE WHICH MODEL YOU WOULD LIKE TO RUN####
liq <- "NoLiquid"
# liq <- "Liquid"
###########################################
####### DO NOT EDIT BEYOND THIS POINT #####
###########################################
#this model has a SEE's precalculated
#you can only modify if you have liquid data or not
#NOTE: you cannot have a dataset with the liquid and no liquid model and use both models you must choose one model
#### INSTALL REQUIRED PACKAGES IF NECESSARY
pack1 <- suppressWarnings(require(PerformanceAnalytics))
if(pack1 == FALSE) {install.packages("PerformanceAnalytics")}
pack2 <- suppressWarnings(require(rJava))
if(pack2 == FALSE) {install.packages("rJava")}
pack3 <- suppressWarnings(require(extraTrees))
if(pack3 == FALSE) {install.packages("extraTrees")}
pack4 <- suppressWarnings(require(readxl))
if(pack4 == FALSE) {install.packages("readxl")}
pack5 <- suppressWarnings(require(EnvStats))
if(pack5 == FALSE) {install.packages("EnvStats")}
options(java.parameters = "-Xmx4g") #Note: you must run this at the beginning of you session otherwise it'll suck
setwd(paste(dirname(rstudioapi::getActiveDocumentContext()$path)))
####Load models###
load("P_C_liq.Rdata")  #SEE = 3.44 kbar
load("T_C_liq.Rdata")  #SEE = 45.57 deg C
load("P_C_noliq.Rdata")#SEE = 4.57 kbar
load("T_C_noliq.Rdata")#SEE = 79.78 deg C
#### LOAD INPUT DATA####
YOURDATA <- read.csv("InputDataTEST.csv") #Change "Userdata.txt" to whatever data you have
#Note: your data should have the same elements as our model and be in the same naming convention (.cpx and .liq suffix)
#CPX DATA: SiO2.cpx,TiO2.cpx,Al2O3.cpx,Cr2O3.cpx,FeO.cpx,MgO.cpx,MnO.cpx,CaO.cpx,Na2O.cpx
#corresponding liquid data should be in the same row as the cpx pair
#LIQ DATA: SiO2.liq,TiO2.liq,Al2O3.liq,FeO.liq,MgO.liq,MnO.liq,CaO.liq,Na2O.liq,K2O.liq
####Define input data####
if(liq == "NoLiquid") {INPUTDATA <- YOURDATA[,c("SiO2.cpx","TiO2.cpx","Al2O3.cpx","Cr2O3.cpx","FeO.cpx","MgO.cpx","MnO.cpx","CaO.cpx","Na2O.cpx")]}
if(liq == "Liquid")   {INPUTDATA <- YOURDATA[,c("SiO2.cpx","TiO2.cpx","Al2O3.cpx","Cr2O3.cpx","FeO.cpx","MgO.cpx","MnO.cpx","CaO.cpx","Na2O.cpx","SiO2.liq", "TiO2.liq","Al2O3.liq", "FeO.liq","MgO.liq", "MnO.liq", "CaO.liq", "Na2O.liq","K2O.liq")]}
####Run the model####
if(liq == "Liquid")   {predP <- predict(P_C_liq, newdata = INPUTDATA, allValues=TRUE)}
if(liq == "Liquid")   {predT <- predict(T_C_liq, newdata = INPUTDATA, allValues=TRUE)}
if(liq == "NoLiquid") {predP <- predict(P_C_noliq, newdata = INPUTDATA, allValues=TRUE)}
if(liq == "NoLiquid") {predT <- predict(T_C_noliq, newdata = INPUTDATA, allValues=TRUE)}
## extract values and calculate iqr
P_values <- apply(predP,1,median)
T_values <- apply(predT,1,median)
P_IQR <- apply(predP,1,IQR)
T_IQR <- apply(predT,1,IQR)
View(INPUTDATA)
View(YOURDATA)
########################
#####PLUG AND PLAY######
########################
# written by Corin Jorgenson and Oliver Higgins 2021 #
#################################
####### USER INPUT REQUIRED #####
#################################
#### CHOOSE WHICH MODEL YOU WOULD LIKE TO RUN####
liq <- "NoLiquid"
# liq <- "Liquid"
###########################################
####### DO NOT EDIT BEYOND THIS POINT #####
###########################################
#this model has a SEE's precalculated
#you can only modify if you have liquid data or not
#NOTE: you cannot have a dataset with the liquid and no liquid model and use both models you must choose one model
#### INSTALL REQUIRED PACKAGES IF NECESSARY
pack1 <- suppressWarnings(require(PerformanceAnalytics))
if(pack1 == FALSE) {install.packages("PerformanceAnalytics")}
pack2 <- suppressWarnings(require(rJava))
if(pack2 == FALSE) {install.packages("rJava")}
pack3 <- suppressWarnings(require(extraTrees))
if(pack3 == FALSE) {install.packages("extraTrees")}
pack4 <- suppressWarnings(require(readxl))
if(pack4 == FALSE) {install.packages("readxl")}
pack5 <- suppressWarnings(require(EnvStats))
if(pack5 == FALSE) {install.packages("EnvStats")}
options(java.parameters = "-Xmx4g") #Note: you must run this at the beginning of you session otherwise it'll suck
setwd(paste(dirname(rstudioapi::getActiveDocumentContext()$path)))
####Load models###
load("P_C_liq.Rdata")  #SEE = 3.44 kbar
load("T_C_liq.Rdata")  #SEE = 45.57 deg C
load("P_C_noliq.Rdata")#SEE = 4.57 kbar
load("T_C_noliq.Rdata")#SEE = 79.78 deg C
#### LOAD INPUT DATA####
YOURDATA <- read.csv("InputDataTEST.csv") #Change "Userdata.txt" to whatever data you have
#Note: your data should have the same elements as our model and be in the same naming convention (.cpx and .liq suffix)
#CPX DATA: SiO2.cpx,TiO2.cpx,Al2O3.cpx,Cr2O3.cpx,FeO.cpx,MgO.cpx,MnO.cpx,CaO.cpx,Na2O.cpx
#corresponding liquid data should be in the same row as the cpx pair
#LIQ DATA: SiO2.liq,TiO2.liq,Al2O3.liq,FeO.liq,MgO.liq,MnO.liq,CaO.liq,Na2O.liq,K2O.liq
####Define input data####
if(liq == "NoLiquid") {INPUTDATA <- YOURDATA[,c("SiO2.cpx","TiO2.cpx","Al2O3.cpx","Cr2O3.cpx","FeO.cpx","MgO.cpx","MnO.cpx","CaO.cpx","Na2O.cpx")]}
if(liq == "Liquid")   {INPUTDATA <- YOURDATA[,c("SiO2.cpx","TiO2.cpx","Al2O3.cpx","Cr2O3.cpx","FeO.cpx","MgO.cpx","MnO.cpx","CaO.cpx","Na2O.cpx","SiO2.liq", "TiO2.liq","Al2O3.liq", "FeO.liq","MgO.liq", "MnO.liq", "CaO.liq", "Na2O.liq","K2O.liq")]}
####Run the model####
if(liq == "Liquid")   {predP <- predict(P_C_liq, newdata = INPUTDATA, allValues=TRUE)}
if(liq == "Liquid")   {predT <- predict(T_C_liq, newdata = INPUTDATA, allValues=TRUE)}
if(liq == "NoLiquid") {predP <- predict(P_C_noliq, newdata = INPUTDATA, allValues=TRUE)}
if(liq == "NoLiquid") {predT <- predict(T_C_noliq, newdata = INPUTDATA, allValues=TRUE)}
## extract values and calculate iqr
P_values <- apply(predP,1,median)
T_values <- apply(predT,1,median)
P_IQR <- apply(predP,1,IQR)
T_IQR <- apply(predT,1,IQR)
#
